This notebook is part of an exploratory analysis of machine learning used to decompose hyperspectral datasets of hybrid perovskite nanoscale materials.
Two machine learning models are primarily used: Nonnegative Matrix Factorization and Variational Autoencoders
Notebook One: Preprocessing
Imports, Functions, Classes, and Colors¶
Imports¶
import numpy as np
import random
import h5py
import matplotlib.pyplot as plt; plt.rcParams['figure.dpi'] = 300
import matplotlib as mpl
from matplotlib.gridspec import GridSpec
from matplotlib.ticker import MaxNLocator
from matplotlib.animation import FuncAnimation, ArtistAnimation
from celluloid import Camera
from mpl_toolkits.axes_grid1.anchored_artists import AnchoredSizeBar
from typing import Optional, Union, List, Tuple
from scipy.ndimage import median_filter, gaussian_filter
from scipy.signal import find_peaks, savgol_filter
from skimage.restoration import denoise_wavelet
save_directory = "./png-figures/preprocessing/" # modify to save figures as a specified file extension
file_extension = ".png"
Functions and Classes¶
def cropper(wavelength_file, spectra_file, shortwavelength, longwavelength):
# This function allows us to crop all wavelengths below and above a specified value
# Returns new wavelengths object and new spectra file
if (
shortwavelength < wavelength_file.min()
or longwavelength > wavelength_file.max()
):
print(
"Desired wavelength exceeds range. Choose a value between {}nm and {}nm".format(
wavelength_file.min(), wavelength_file.max()
)
)
return
y = []
shortwavelengthindex = find_nearest(wavelength_file, shortwavelength)
longwavelengthindex = find_nearest(wavelength_file, longwavelength)
x = wavelength_file[shortwavelengthindex:longwavelengthindex]
for i in range(len(spectra_file[shortwavelengthindex:longwavelengthindex][:])):
y.append(spectra_file[shortwavelengthindex:longwavelengthindex][i])
y = np.asarray(y)
z = longwavelengthindex - shortwavelengthindex
return x, y, z
def find_nearest(array, value):
array = np.asarray(array)
idx = (np.abs(array - value)).argmin()
return idx
def normalize(array):
return (array - array.min()) / (array.max() - array.min())
class data:
def __init__(self, datapath):
self.h5 = h5py.File(datapath, "r")
self.meas0 = acquisition(self.h5["Acquisition0"])
self.meas1 = acquisition(self.h5["Acquisition1"])
self.meas2 = acquisition(self.h5["Acquisition2"])
self.meas3 = acquisition(self.h5["Acquisition3"])
return
def close(self):
return self.h5.close()
class acquisition:
def __init__(self, acquisition):
self.name = acquisition["PhysicalData"]["ChannelDescription"][()][0]
self.xdim = acquisition["ImageData"]["DimensionScaleX"][()]
self.ydim = acquisition["ImageData"]["DimensionScaleY"][()]
self.zdim = acquisition["ImageData"]["DimensionScaleZ"][()]
self.image = acquisition["ImageData"]["Image"][()].squeeze()
try:
self.xpix = self.image.shape[0]
self.ypix = self.image.shape[1]
except IndexError:
self.xpix = self.image.shape[0]
self.ypix = 0
self.acqname = acquisition.name
if self.acqname == "/Acquisition2":
self.wavelengths = acquisition["ImageData"]["DimensionScaleC"][()] * 1e9
try:
self.xpix = self.image.shape[2]
self.ypix = self.image.shape[1]
self.zpix = self.image.shape[0]
except IndexError:
self.xpix = self.image.shape[1]
self.ypix = 0
return
def denoise(self, subtractbackground=False, minusmedian=False):
if self.acqname != "/Acquisition2":
print("Can only denoise CL, please choose CL measure.")
pass
denoise_kwargs = dict(
multichannel=False,
convert2ycbcr=False,
wavelet="sym8",
rescale_sigma=False,
wavelet_levels=6,
mode="soft",
)
if self.ypix == 0:
spectra = self.image.transpose(1, 0)
spectra = [savgol_filter(s, 5, 2) for s in spectra]
spectra = [median_filter(s, size=(5)) for s in spectra]
if subtractbackground==True:
spectra = [s-np.mean(s[0:200]) for s in spectra]
spectra = [s.clip(min=0) for s in spectra]
return np.reshape(spectra, (self.xpix, self.image.shape[0]))
else:
spectra = self.transposedata().reshape(
self.xpix * self.ypix, self.image.shape[0]
)
denoise_kwargs = dict(
multichannel=False,
convert2ycbcr=False,
wavelet="sym8",
rescale_sigma=False,
wavelet_levels=6,
mode="soft",
)
spectra = [savgol_filter(s, 5, 2) for s in spectra]
spectra = [median_filter(s, size=(5)) for s in spectra]
if subtractbackground==True:
spectra = [s-np.mean(s[0:200]) for s in spectra]
spectra = [s.clip(min=0) for s in spectra]
return np.reshape(spectra, (self.xpix, self.ypix, self.image.shape[0]))
Colors¶
Uncomment to select color palette
from IPython.display import Markdown, display
#color = ['#995CF1', '#82DB78', '#B5D37C', '#8DAFAE', '#A2152D', '#AF922E'] #1h
#color = ['#FDD143', '#F86506', '#8A2537', '#05385F', '#037EA0', '#4CCBEE', '#FCE3AC']
#color = ['#921001', '#E45C4C', '#B0E2FF', '#F3859E', '#4C2E73', '#5240AC', '#7763D3', '#CB9AF3']
#color = ['#F3D646', '#01596D', '#001D44', '#B5227F', '#CB971D', '#01306D', '#600049']
#color = ['#B5227F', '#7763D3', '#2CBDC2', '#67BB30', '#E5E522', '#046C95', '#028A46', '#CB9AF3']
#color = ['#995CF1', '#82DB78', '#BBFFFF', '#A2152D', '#FFEC8B', '#7D26CD', '#159947', '#8DAFAE']
#color = ['#5e4fa2', '#3288bd', '#66c2a5', '#abdda4', '#e6f598', '#fee08b', '#fdae61',
# '#f46d43', '#d53e4f', '#9e0142']
#color = ['#2A99EB', '#4CAF50', '#F1EB6C', '#EFAC53', '#DA6832', '#FF2400', '#5e4fa2']
#color = ['#006341', '#CC8A00', '#D82342', '#AA9767', '#80BC00']
#color = ['#d90202', '#8413ed', '#13b7ed', '#13ed54']
#color = ['#f44336', '#b15beb', '#03A9F4', '#80BC00', '#FFEB3B', '#FF9800']
#color = ['#A7226E', '#EC2049', '#F7DB4F', '#2F9599', '#F26B38']
random_color = ["#"+''.join([random.choice('0123456789ABCDEF') for j in range(6)])
for i in range(100)]
#color = ['#3a86ff', '#ffbe0b', '#ff006e', '#8338ec', '#fb5607', '#390099'] * 1000
color = ["#0095EF", "#EFCA00", "#F31D64", "#6A38B3",
"#FE433C", "#3C5081", "#43c68b", "#A224AD"] * 1000
#display(Markdown('<br>'.join(
# f'<span style="font-family: monospace">{color} <span style="color: {color}">████████</span></span>'
# for color in color
#)))
Reading Data¶
Datasets and Figures¶
The datasets used in this project are not currently published and are therefore not available within this repository. Two hyperspectral images of hybrid perovskites were taken via cathodoluminescence spectroscopy at Oak Ridge National Laboratory and are available on the Cheaha Supercomputer at the University of Alabama at Birmingham. For further information, feel free to contact me at jperkin4@uab.edu
Image 1: Scanning Electron Microscopy 2D image
Image 2: Cathodoluminescence Microscopy hyperspectral 3D image
filepath1 = "20220507_MAPbI3Grain1_5kV_0p11nA_2kx.nosync.h5"
filepath2 = "20220508_MAPbI3Grain5EdgeMap_5kV_14pA_16kx.nosync.h5"
#filepath1 = "20220507_MAPbI3Grain1_5kV_0p11nA_2kx.h5"
#filepath2 = "20220508_MAPbI3Grain5EdgeMap_5kV_14pA_16kx.h5"
ds1 = data(filepath1)
ds2 = data(filepath2)
# Wavelength values (1024 wavelengths in the z dimension)
f1_wav = ds1.meas2.wavelengths
f2_wav = ds2.meas2.wavelengths
f1_wav_orig = f1_wav
f2_wav_orig = f1_wav
# Grain 1: Wide-field, low resolution hybrid perovskite (HP) grain
f1 = data(filepath1).h5
# Grain 2: High-resolution HP grain boundary
f2 = data(filepath2).h5
f1_img1 = acquisition(f1["Acquisition1"]).image
f1_img2 = acquisition(f1["/Acquisition2"]).image
f1_xpix = acquisition(f1["/Acquisition2"]).xpix
f1_ypix = acquisition(f1["/Acquisition2"]).ypix
f1_zpix = acquisition(f1["/Acquisition2"]).zpix
f1_zpix_orig = f1_zpix
f1_img2_2d = np.reshape(f1_img2, (f1_zpix, f1_ypix*f1_xpix))
f2_img1 = acquisition(f2["Acquisition1"]).image
f2_img2 = acquisition(f2["/Acquisition2"]).image
f2_xpix = acquisition(f2["/Acquisition2"]).xpix
f2_ypix = acquisition(f2["/Acquisition2"]).ypix
f2_zpix = acquisition(f2["/Acquisition2"]).zpix
f2_zpix_orig = f2_zpix
f2_img2_2d = np.reshape(f2_img2, (f2_zpix, f2_ypix*f2_xpix))
print("f1 dimensions: " + str(f1_xpix) + ", " + str(f1_ypix) + ", " + str(f1_zpix))
print("f2 dimensions: " + str(f2_xpix) + ", " + str(f2_ypix) + ", " + str(f2_zpix))
f1 dimensions: 164, 122, 1024 f2 dimensions: 91, 77, 1024
Now, we will crop the data to only include wavelengths between 350 nm and 900 nm. No relevant peaks will fall outside of this range.
# Bottom and top of range for spectra
short_wav = 350
long_wav = 900
f1_wav, f1_img2_cropped, f1_zpix = cropper(f1_wav, f1_img2_2d, short_wav, long_wav)
f2_wav, f2_img2_cropped, f2_zpix = cropper(f2_wav, f2_img2_2d, short_wav, long_wav)
f1_img2 = np.reshape(f1_img2_cropped, (f1_zpix, f1_ypix, f1_xpix))
f2_img2 = np.reshape(f2_img2_cropped, (f2_zpix, f2_ypix, f2_xpix))
f1_pix = f1_zpix * f1_ypix * f1_xpix
f2_pix = f2_zpix * f2_ypix * f2_xpix
dif1 = f1_zpix_orig - f1_zpix
dif2 = f2_zpix_orig - f2_zpix
print("Number of cropped pixels from first grain: " + str(dif1))
print("Number of cropped pixels from second grain: " + str(dif2))
dif1 = find_nearest(f1_wav_orig, short_wav)
dif2 = find_nearest(f2_wav_orig, short_wav)
print("Number of cropped wavelengths from bottom range of first grain: " + str(dif1))
print("Number of cropped wavelengths from bottom range of second grain: " + str(dif2))
Number of cropped pixels from first grain: 379 Number of cropped pixels from second grain: 379 Number of cropped wavelengths from bottom range of first grain: 219 Number of cropped wavelengths from bottom range of second grain: 219
Cosmic rays¶
Cosmic rays saturate the detector and create spectra that do not correspond to anything physical in the material. Thus, it is necessary to remove them before proceeding with any analysis to ensure the quality of the data.
The first function makes a list of some points in the dataset affected by cosmic rays. The function determines determines if the spectral intensity at that wavelength has increased by another specified multiple (jump1) than that wavelength in the previous pixel (in the image dimension). Then, these points are plotted to verify they are indeed cosmic rays. These affected points are replaced with the mean of the surrounding points by a radius of 4 and replotted to show the resulting spectra.
The second finder function if the point has a spectral intensity greater than a specified multiple (jump2) of the previous wavelength in the spectrum (in the spectral dimension). Similar to the process above, these points are plotted, the cosmic rays are removed, and they are plotted again.
Finding cosmic rays 1¶
This function is an initial sweep through the data to find obvious cosmic rays. It finds them through dramatic changes in the image dimension.
def cosmic_ray_finder_1(img, jump1): # Image dimension
# Identifies some pixels affected by cosmic rays and returns a list of points
pts = []
for z in range(len(img)):
for y in range(len(img[z])):
for x in range(len(img[z][y])):
if x == 0: # Left edge
if y == 0: # Top left corner
if (img[z,y,x] > jump1*(img[z,y,x+1])
or img[z,y,x] > jump1*(img[z,y+1,x])):
pts.append([z,y,x])
elif y == (len(img[z])-1): # Bottom left corner
if (img[z,y,x] > jump1*(img[z,y,x+1])
or img[z,y,x] > jump1*(img[z,y-1,x])):
pts.append([z,y,x])
else:
if (img[z,y,x] > jump1*(img[z,y,x+1]) or img[z,y,x] > jump1*(img[z,y-1,x])
or img[z,y,x] > jump1*(img[z,y+1,x])):
pts.append([z,y,x])
elif x == (len(img[z][y])-1): # Right edge
if y == 0: # Top right corner
if (img[z,y,x] > jump1*(img[z,y,x-1])
or img[z,y,x] > jump1*(img[z,y+1,x])):
pts.append([z,y,x])
elif y == (len(img[z])-1): # Bottom right corner
if (img[z,y,x] > jump1*(img[z,y,x-1])
or img[z,y,x] > jump1*(img[z,y-1,x])):
pts.append([z,y,x])
else:
if (img[z,y,x] > jump1*(img[z,y,x-1]) or img[z,y,x] > jump1*(img[z,y-1,x])
or img[z,y,x] > jump1*(img[z,y+1,x])):
pts.append([z,y,x])
elif y == 0: # Top edge (not including corners)
if (img[z,y,x] > jump1*(img[z,y,x-1]) or img[z,y,x] > jump1*(img[z,y,x+1])
or img[z,y,x] > jump1*(img[z,y+1,x])):
pts.append([z,y,x])
elif y == (len(img[z])-1): # Bottom edge (not including corners)
if (img[z,y,x] > jump1*(img[z,y,x-1]) or img[z,y,x] > jump1*(img[z,y,x+1])
or img[z,y,x] > jump1*(img[z,y-1,x])):
pts.append([z,y,x])
else:
if (img[z,y,x] > jump1*(img[z,y,x-1]) or img[z,y,x] > jump1*(img[z,y,x+1])
or img[z,y,x] > jump1*(img[z,y-1,x]) or img[z,y,x] > jump1*(img[z,y+1,x])):
pts.append([z,y,x])
return pts
f1_cosmic_pts_1 = cosmic_ray_finder_1(f1_img2, 7)
f2_cosmic_pts_1 = cosmic_ray_finder_1(f2_img2, 7)
print("Grain 1 cosmic points: ", f1_cosmic_pts_1, "\nGrain 2 cosmic points: ", f2_cosmic_pts_1)
Grain 1 cosmic points: [] Grain 2 cosmic points: [[1, 40, 0], [2, 40, 0], [3, 40, 0], [7, 40, 0], [11, 68, 13], [26, 27, 61], [30, 33, 54], [77, 67, 49], [78, 11, 73], [78, 12, 31], [78, 67, 49], [79, 12, 31], [80, 12, 31], [81, 12, 31], [92, 2, 14], [119, 73, 42], [160, 21, 63], [167, 75, 0], [196, 11, 45], [227, 16, 2], [251, 1, 33], [260, 22, 14], [261, 22, 14], [262, 22, 14], [280, 22, 77], [284, 42, 2], [285, 42, 2], [287, 42, 2], [358, 21, 61], [372, 38, 76], [393, 22, 8], [414, 59, 86], [415, 59, 86], [415, 60, 64], [416, 59, 86], [433, 62, 45], [434, 62, 45], [435, 62, 45], [455, 26, 53], [456, 26, 53], [457, 26, 53], [536, 57, 56], [559, 5, 40]]
Plot code¶
# This plots all the cosmic points in a single GIF to be quickly analyzed
def cosmic_gif(suptitle, img, spect_pt, cosmic_pts, wav, save=False):
if len(cosmic_pts) == 0: # If the cosmic points array is empty
print("No cosmic rays identified")
return
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(8, 3))
fig.suptitle(suptitle, fontsize=10)
camera = Camera(fig)
ax1.set_xticks([], labels=None)
ax1.set_yticks([], labels=None)
ax2.set_ylim(0, 3000)
ax2.set_xlim(wav[0]-5, wav[-1]+5)
#for i in range(len(cosmic_pts)):
for i in range(len(cosmic_pts)):
ax1.imshow(img[spect_pt-short_wav, :, :], animated=True)
ax1.plot(cosmic_pts[i][2], cosmic_pts[i][1], "X", ms=5, c=color[i])
ax2.plot(wav, img[:, cosmic_pts[i][1], cosmic_pts[i][2]], color=color[i],
animated=True)
ax2.axvline(x=wav[cosmic_pts[i][0]], c='black', lw=1, linestyle=':')
camera.snap()
#ax2.clear()
anim = camera.animate()
if save:
anim.save("./png-figures/cosmic-rays/" + suptitle + ".gif", fps=5)
else:
plt.show()
Plot 1 (before removal)¶
cosmic_gif("1) Grain 1 Cosmic Ray Points (before removal 1)", f1_img2, 780, f1_cosmic_pts_1, f1_wav)
No cosmic rays identified
cosmic_gif("2) Grain 2 Cosmic Ray Points (before removal 1)", f2_img2, 780, f2_cosmic_pts_1, f2_wav)
Extra Plot Code¶
# This plots all the points at once, but can often be too large
def cosmic_plot(suptitle, img, spect_point, cosmic_points, wav):
rows = len(cosmic_points)
columns = 2
fig = plt.figure(figsize=(20, 3*rows))
#fig.suptitle(suptitle, fontsize=30)
gs = GridSpec(rows, columns)
row_count = 0
for i in range(rows):
# img
fig_img = fig.add_subplot(gs[row_count,0])
fig_img.imshow(img[spect_point,:,:])
fig_img.set_title(str(cosmic_points[row_count]))
fig_img.plot(cosmic_points[row_count][2], cosmic_points[row_count][1], "X",
ms=10, c=color[row_count])
# plt
fig_plt = fig.add_subplot(gs[row_count,1])
#fig_plt.set_title("Spectra")
fig_plt.plot(wav, img[:,cosmic_points[row_count][1], cosmic_points[row_count][2]],
c=color[row_count], lw=5)
fig_plt.axvline(x=cosmic_points[row_count][0], c='black', lw=1, linestyle=':')
row_count += 1
gs.tight_layout(fig, rect=[0,0.03,1,0.95])
plt.show()
# This plots all the cosmic points in a single GIF to be quickly analyzed
def cosmic_gif1(suptitle, img, spect_pt, cosmic_pts, wav):
if len(cosmic_pts) == 0: # If the cosmic points array is empty
print("No cosmic rays identified")
return
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(7, 3))
fig.suptitle(suptitle, fontsize=10)
ax1.imshow(img[spect_pt, :, :], animated=True)
ax1.set_xticks([], labels=None)
ax1.set_yticks([], labels=None)
ax2.set_ylim(0, 3000)
ax2.set_xlim(wav[0]-5, wav[-1]+5)
frames = []
def animate(i):
im1 = ax1.plot(cosmic_pts[i][2], cosmic_pts[i][1], "X", ms=5, c=color[i])
return im1
for i in range(len(cosmic_pts)):
#im1 = ax1.plot(cosmic_pts[i][2], cosmic_pts[i][1], "X", ms=5, c=color[i])
im2, = ax2.plot(wav, img[:, cosmic_pts[i][1], cosmic_pts[i][2]], color=color[i],
animated=True)
#ax2.axvline(x=cosmic_pts[i][0], c='black', lw=3, linestyle=':')
#frames.append([im1, im2])
frames.append(im2)
anim1 = FuncAnimation(fig, animate)
anim2 = ArtistAnimation(fig, frames)
plt.show()
# This plots all the cosmic points in a single GIF to be quickly analyzed
def cosmic_gif2(suptitle, img, spect_pt, cosmic_pts, wav):
if len(cosmic_pts) == 0: # If the cosmic points array is empty
print("No cosmic rays identified")
return
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(7, 3))
fig.suptitle(suptitle, fontsize=10)
ax1.set_xticks([], labels=None)
ax1.set_yticks([], labels=None)
ax2.set_ylim(0, 3000)
ax2.set_xlim(wav[0]-5, wav[-1]+5)
frames = []
#for i in range(len(cosmic_pts)):
for i in range(len(cosmic_pts)):
im1 = ax1.imshow(img[spect_pt, :, :], animated=True)
ax1.plot(cosmic_pts[i][2], cosmic_pts[i][1], "X", ms=5, c=color[i])
im2, = ax2.plot(wav, img[:, cosmic_pts[i][1], cosmic_pts[i][2]], color=color[i],
animated=True)
#ax2.axvline(x=cosmic_pts[i][0], c='black', lw=3, linestyle=':')
frames.append([im1, im2])
anim = ArtistAnimation(fig, frames)
plt.show()
Old plots¶
With jump2 = 2.5; no jump1
F1: [[0, 29, 123], [2, 109, 8], [76, 60, 133], [79, 82, 68], [97, 25, 3], [117, 68, 56], [130, 35, 39], [132, 66, 74], [142, 110, 26], [143, 94, 148], [351, 119, 91], [355, 97, 2], [396, 27, 118], [422, 98, 105], [438, 114, 102], [463, 110, 62], [529, 66, 127], [552, 121, 121], [571, 65, 90], [590, 86, 26], [598, 39, 131], [624, 87, 70], [626, 103, 144], [634, 89, 155], [753, 121, 17], [818, 107, 47], [867, 67, 74], [870, 104, 29], [929, 55, 4], [929, 87, 32], [942, 65, 91], [980, 94, 24], [1019, 82, 111]]
F2: [[19, 45, 26], [20, 45, 26], [25, 20, 53], [41, 13, 65], [59, 65, 45], [65, 13, 65], [66, 57, 14], [80, 23, 74], [93, 23, 21], [97, 6, 20], [110, 52, 20], [122, 48, 11], [135, 61, 32], [148, 76, 65], [161, 38, 25], [199, 23, 21], [209, 3, 61], [214, 72, 12], [215, 30, 89], [220, 40, 0], [221, 40, 0], [223, 61, 4], [224, 68, 4], [227, 62, 17], [230, 68, 13], [236, 19, 44], [245, 27, 61], [248, 33, 54], [249, 33, 54], [254, 9, 13], [258, 27, 56], [291, 61, 4], [294, 44, 61], [294, 64, 14], [296, 11, 73], [296, 67, 49], [297, 11, 73], [297, 12, 31], [298, 12, 31], [311, 2, 14], [329, 48, 87], [332, 44, 50], [333, 61, 42], [335, 73, 42], [363, 43, 24], [377, 21, 63], [379, 21, 63], [386, 75, 0], [390, 48, 40], [398, 24, 0], [401, 43, 24], [415, 11, 45], [416, 66, 89], [446, 16, 2], [454, 44, 90], [460, 27, 73], [464, 7, 23], [469, 1, 33], [470, 1, 33], [479, 22, 14], [499, 22, 77], [503, 42, 2], [539, 26, 6], [566, 22, 39], [577, 21, 61], [591, 38, 76], [612, 22, 8], [630, 66, 89], [632, 59, 86], [633, 59, 86], [634, 60, 64], [652, 62, 45], [656, 59, 2], [674, 26, 53], [712, 44, 58], [755, 57, 56], [764, 71, 39], [778, 5, 40], [851, 14, 33], [884, 2, 6], [918, 30, 79], [919, 30, 79], [927, 6, 32], [932, 67, 69], [939, 55, 34], [955, 10, 26], [963, 51, 7], [964, 52, 0], [965, 52, 0], [967, 62, 56], [968, 62, 56]]
#cosmic_plot("Figure 1", f1_img2, 710, f1_cosmic_points, f1_wav)
#cosmic_plot("Figure 2", f2_img2, 405, f2_cosmic_points, f2_wav)